set scheme s1mono
graph set window fontface "Garamond"

global size size(large)
global ls labsize(medlarge)
global legs medlarge

*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
* SET GLOBAL $PATHS
*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
global root = "INSERT CUSTOMIZED PATH"
global data	 = "$root/data"
global figures  = "$root/figsandtabs/"
*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

use "$data/temp/SOEP1984_2016_short.dta", clear

*Abi is high ed
*Real is intermediate Ed
** put it back to long form to estimate residual earnings; 

/*
labgro: individual monthly gross wage
labnet: individual monthly net wage
o_eqpost: equivalized individual annual post-transfer income 
(meaning pension or welfare income flows are considered and total household income 
is divided by the square root of the household members such
that non-working household members are also assigned income).
o_indtotal: individual annual total income
o_indwealth: individual wealth
o_hhwealth: equivalized household wealth
*/

ren syear year
tsset pid year

global min = 25
global pol = "age age_2"
global poln = "2nd_order"
global maxage = 85

gen lwage = ln(o_eqpostII)

keep if civilservant==0

ren Abi Abi_s
bysort pid: egen Abi = max(Abi_s)
bysort pid: egen Real = max(Realschule)
bysort pid: egen Fan = max(Fachhochschule)

gen maxEd = 10 if Real==1 & Abi==0 & Fan==0
replace maxEd = 13 if Abi==1 

global break_abi = 56
global break_rea = 56
gen age_2 = age*age
gen age_b1 = (age-$break_abi)*(age>=$break_abi) if maxEd==13
replace age_b1 = (age-$break_rea)*(age>=$break_rea) if maxEd==10

gen age_b1_sq = (age-$break_abi)^2*(age>=$break_abi) if maxEd==13
replace age_b1_sq = (age-$break_rea)^2*(age>=$break_rea)^2 if maxEd==10

global break_abi_2 = 70
global break_rea_2 = 70

gen age_b2 = (age-$break_abi_2)*(age>=$break_abi_2) if maxEd==13
replace age_b2 = (age-$break_rea_2)*(age>=$break_rea_2) if maxEd==10

gen age_b2_sq = (age-$break_abi_2)^2*(age>=$break_abi_2)^2 if maxEd==13
replace age_b2_sq = (age-$break_rea_2)^2*(age>=$break_rea_2)^2 if maxEd==10

areg lwage age age_2  age_b1 age_b1_sq age_b2 age_b2_sq if maxEd==10 & age>=25, abs(pid) 
predict pred_rea if e(sample), xb
gen resid_rea = lwage-pred_rea
qui summ resid_rea
global s_rea = r(sd)

areg lwage age age_2  age_b1 age_b1_sq age_b2 age_b2_sq if maxEd==13 & age>=25, abs(pid) 
predict pred_abi if e(sample), xb
gen resid_abi = lwage-pred_abi
qui summ resid_abi
global s_abi = r(sd)

areg lwage age age_2  age_b1 age_b1_sq age_b2 age_b2_sq if  PrivateHI==1 & age>=25, abs(pid) 
predict pred_phi if e(sample), xb

areg lwage age age_2  age_b1 age_b1_sq age_b2 age_b2_sq if age>=25, abs(pid) 
predict pred_avg if e(sample), xb

reg lwage i.age if maxEd==13 & age>=25
global overall_c_abi =_b[_cons]
reg lwage i.age i.year if maxEd==13 & age>=25
global c_1984_abi = _b[_cons]
global c_2006_abi = _b[2006.year]
global c_2007_abi = _b[2007.year]
global c_2008_abi = _b[2008.year]
global c_2009_abi = _b[2009.year]
global c_2010_abi = _b[2010.year]
global c_2011_abi = _b[2011.year]

global fix_abi = $c_1984_abi+1/6*($c_2006_abi+$c_2007_abi+$c_2008_abi+$c_2009_abi+$c_2010_abi+$c_2011_abi)-$overall_c_abi


reg lwage i.age if maxEd==10 & age>=25
global overall_c_rea =_b[_cons]
reg lwage i.age i.year if maxEd==10 & age>=25
global c_1984_rea = _b[_cons]
global c_2006_rea = _b[2006.year]
global c_2007_rea = _b[2007.year]
global c_2008_rea = _b[2008.year]
global c_2009_rea = _b[2009.year]
global c_2010_rea = _b[2010.year]
global c_2011_rea = _b[2011.year]

global fix_rea = $c_1984_rea+1/6*($c_2006_rea+$c_2007_rea+$c_2008_rea+$c_2009_rea+$c_2010_rea+$c_2011_rea)-$overall_c_rea


*******************************
** plot age residuals
*******************************

areg lwage i.age if maxEd==10 & age>=25, abs(pid) 
matrix A_rea = e(b)
matrix A_rea = A_rea'

areg lwage i.age if maxEd==13 & age>=25, abs(pid) 
matrix A_abi = e(b)
matrix A_abi = A_abi'

areg lwage i.age if PrivateHI==1 & age>=25, abs(pid) 
matrix A_phi = e(b)
matrix A_phi = A_phi'

areg lwage i.age if age>=25, abs(pid) 
matrix A_avg = e(b)
matrix A_avg = A_avg'

keep if age>=25
collapse (mean) pred_rea pred_abi pred_phi pred_avg, by(age)
svmat A_rea
summ A_rea
replace A_rea = A_rea + r(max)
summ age if A_rea!=.
replace A_rea = . if age==r(max)

svmat A_abi
summ A_abi
replace A_abi = A_abi + r(max)
summ age if A_abi!=.
replace A_abi = . if age==r(max)

svmat A_phi
summ A_phi
replace A_phi = A_phi + r(max)
summ age if A_phi!=.
replace A_phi = . if age==r(max)

svmat A_avg
summ A_avg
replace A_avg = A_avg + r(max)
summ age if A_avg!=.
replace A_avg = . if age==r(max)

line pred_abi pred_rea A_abi A_rea age if age>=25 & age<=94, xlab(, $ls) ylab(, $ls) xtitle("Age", $size) ytitle("log(income)", $size) lpattern(solid solid dash dash) lcolor(black gs6 black gs6) legend(ring(0) position(4) size($legs) label(1 "Ed 13, fitted") label(2 "Ed 10, fitted") label(3 "Ed 13, age FE") label(4 "Ed 10, age FE"))
graph export "$figures/income_fit_v2.pdf", as(pdf) replace

replace pred_rea = pred_rea + ($s_rea)^2/2 + $fix_rea
replace pred_abi = pred_abi + ($s_abi)^2/2 + $fix_abi

outsheet age pred_rea pred_abi using "$data/processed/IncomeProcess/trends_v2.csv", comma replace



